# Copyright (C) 2013-2015 Ragpicker Developers.
# This file is part of Ragpicker Malware Crawler - http://code.google.com/p/malware-crawler/

import logging
import re

from core.abstracts import Crawler


try:
    from yapsy.IPlugin import IPlugin
except ImportError:
    raise ImportError, 'Yapsy (Yet Another Plugin System) is required to run this program : http://yapsy.sourceforge.net'

log = logging.getLogger("MalwaredlCrawler")

class Malwaredl(IPlugin, Crawler):
         
    def run(self):
        self.mapURL = {}
        mdl = []
        log.debug("Fetching from Malware Domain List")
        
        # parser
        soup = self.parse('http://www.malwaredomainlist.com/hostslist/mdl.xml')
        
        for row in soup('description'):
            mdl.append(row)
        del mdl[0]
        
        for row in mdl:
            site = re.sub('&amp;', '&', str(row).split()[1]).replace(',', '')
            if site == '-':
                self.storeURL(re.sub('&amp;', '&', str(row).split()[4]).replace(',', ''))    
            else:
                self.storeURL(site)
                
        log.info("Found %s urls" % len(self.mapURL))
        
        return self.mapURL
